import numpy as np
from tqdm import tqdm

neg_user = [13883189723,18883337780]# 去掉用户交涉大于6000的两个用户


path1 = 'F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\negSample\\patent-240k.txt'
path2 = 'F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\patent-100k\\patent_120k_patent.txt'
with open(path2,'r') as g:
    data_g = g.readlines()
    item_list_g = set()
    for line in tqdm(data_g):
        item_list_g.add(line.split('\n')[0])




# 初始的patent-240k文件
with open(path1,'r') as f:
    data = f.readlines()
    user_item = dict()
    item_list = set()
    for line in tqdm(data):
        user = line.split('\t')[0]
        item = line.split('\t')[1]
        rating = line.split('\t')[2]
        if item in item_list_g:
            if user not in neg_user:
                if user not in user_item:
                    user_item[user] = [item]
                else:
                    if item not in user_item[user]:
                        user_item[user].append(item)

with open('write.txt', 'a') as w:
    for user in tqdm(user_item):
        for pos in user_item[user]:
            w.write(user + '\t' + str(pos) + '\t' + '1' + '\n')



